#include "linux/kernel.h"
#include "linux/interrupt.h"
#include "linux/timer.h"
#include "linux/module.h"
#include "linux/delay.h"
#include "lwip/netif.h"
#include "lwip/netifapi.h"
#include "los_task.h"
#include "los_list.h"
#include "sys/bus.h"

extern void arm_clean_cache_range(UINTPTR start, UINTPTR end);
extern void arm_inv_cache_range(UINTPTR start, UINTPTR end);

static void clean_cache(const void *ptr, uint32_t size)
{
    uintptr_t start = (uintptr_t)ptr;
    uintptr_t end = start + size;
    arm_clean_cache_range(start, end);
}

static void inv_cache(const void *ptr, uint32_t size)
{
    uintptr_t start = (uintptr_t)ptr;
    uintptr_t end = start + size;
    arm_inv_cache_range(start, end);
}

#define SUN8I_ETH_DRIVER_NAME "sun8i-eth"

#define MDIO_CMD_MII_BUSY		BIT(0)
#define MDIO_CMD_MII_WRITE		BIT(1)

#define MDIO_CMD_MII_PHY_REG_ADDR_MASK	0x000001f0
#define MDIO_CMD_MII_PHY_REG_ADDR_SHIFT	4
#define MDIO_CMD_MII_PHY_ADDR_MASK	0x0001f000
#define MDIO_CMD_MII_PHY_ADDR_SHIFT	12

#define EMAC_DMA_ALIGN          CACHE_ALIGNED_SIZE
#define EMAC_DMA_ETH_BUFSIZE    2048
#define EMAC_DMA_ALLOW_SIZE_MAX 2044
#define EMAC_DMA_TX_BUFNUM      5
#define EMAC_DMA_RX_BUFNUM      5

#define EMAC_SIZE 0x200


#define EMAC_CLK_RMII_EN        BIT(13)
#define EMAC_CLK_EPIT           BIT(2)
#define EMAC_CLK_ETCS_EXT_GMII  0x1
#define EMAC_CLK_ETCS_INT_GMII  0x2

#define EMAC_CTL0               0x00
#define EMAC_CTL1               0x04
#define EMAC_INT_STA            0x08
#define EMAC_INT_EN             0x0c
#define EMAC_TX_CTL0            0x10
#define EMAC_TX_CTL1            0x14
#define EMAC_TX_FLOW_CTL        0x1c
#define EMAC_TX_DMA_DESC        0x20
#define EMAC_RX_CTL0            0x24
#define EMAC_RX_CTL1            0x28
#define EMAC_RX_DMA_DESC        0x34
#define EMAC_MII_CMD            0x48
#define EMAC_MII_DATA           0x4c
#define EMAC_ADDR0_HIGH         0x50
#define EMAC_ADDR0_LOW          0x54
#define EMAC_TX_DMA_STA         0xb0
#define EMAC_TX_CUR_DESC        0xb4
#define EMAC_TX_CUR_BUF         0xb8
#define EMAC_RX_DMA_STA         0xc0
#define EMAC_RX_CUR_DESC        0xc4

#define RX_INT	BIT(8)

#define BMCR                    0x0000
#define BMCR_RST                BIT(15)
#define BMCR_AUTONEG            BIT(12)
#define BMSR                    0x0001
#define BMSR_LINK               BIT(2)
#define BMSR_NEGCOMP            BIT(5)
#define BMSR_100M_FD            BIT(14)
#define BMSR_100M_HD            BIT(13)
#define BMSR_10M_FD             BIT(12)
#define BMSR_10M_HD             BIT(11)        

#define virt_to_phys(addr) VMM_TO_DMA_ADDR(addr)

struct sun8i_emac_res
{
    uintptr_t base;
    uintptr_t emac_clk;
    int irq;
    uint8_t phy;
    uint8_t mac[ETHARP_HWADDR_LEN];
};

struct sun8i_emac_dma_desc {
	uint32_t status;
	uint32_t st;
	uint32_t addr;
	uint32_t next;
} __attribute__((aligned (EMAC_DMA_ALIGN)));

struct sun8i_emac_dma
{
    struct sun8i_emac_dma_desc txdescs[EMAC_DMA_TX_BUFNUM]  __attribute__((aligned (EMAC_DMA_ALIGN)));
    struct sun8i_emac_dma_desc rxdescs[EMAC_DMA_RX_BUFNUM]  __attribute__((aligned (EMAC_DMA_ALIGN)));
    uint8_t txbuf[EMAC_DMA_TX_BUFNUM][EMAC_DMA_ETH_BUFSIZE] __attribute__((aligned (EMAC_DMA_ALIGN)));
    uint8_t rxbuf[EMAC_DMA_RX_BUFNUM][EMAC_DMA_ETH_BUFSIZE] __attribute__((aligned (EMAC_DMA_ALIGN)));
};

struct sun8i_emac
{
    struct netif netif;
    struct sun8i_emac_res res;
    struct sun8i_emac_dma dma;
    uintptr_t base;
    uint16_t txpos;
    uint16_t rxpos;

	EVENT_CB_S rx_event;
    uint32_t task_id;
    uint8_t link_status;
};


#define LINK_ON             0x1
#define LINK_SPEED_100      0x2
#define LINK_FULL_DUPLEX    0x4

static struct sun8i_emac emac0 =
{
    .res = 
    {
        .base = 0x01c30000,
        .emac_clk = 0x01c00030,
        .irq = 82 + 32,
        .phy = 0,
        .mac= {0x48, 0x89, 0xe7, 0xc8, 0x61, 0x8b},
    },
};


static uint32_t emac_read(struct sun8i_emac *emac, int reg);
static void emac_write(struct sun8i_emac *emac, int reg, int val);
static void emac_setbits(struct sun8i_emac *emac, int reg, uint32_t bits);
static void emac_clearbits(struct sun8i_emac *emac, int reg, uint32_t bits);

static u8_t emac_netif_set_hwaddr(struct netif *netif, u8_t *mac, u8_t len)
{
    struct sun8i_emac *emac = (struct sun8i_emac*)netif;
    uint32_t macid_lo, macid_hi;

    if (len != ETHARP_HWADDR_LEN)
    {
        return 1;
    }

	macid_lo = (mac[0] << 0) + (mac[1] << 8) + (mac[2] << 16) + (mac[3] << 24);
	macid_hi = (mac[4] << 0) + (mac[5] << 8);

    emac_write(emac, EMAC_ADDR0_HIGH, macid_hi);
    emac_write(emac, EMAC_ADDR0_LOW, macid_lo);
    return 0;
}

static void emac_netif_send(struct netif *netif, struct pbuf *p)
{
    if (p->tot_len > EMAC_DMA_ALLOW_SIZE_MAX)
    {
        dprintf("%s: tot_len too large\n", __func__);
        return;
    }

    struct sun8i_emac *emac = (struct sun8i_emac*)netif;
    struct sun8i_emac_dma *dma = &emac->dma;

    int txpos = emac->txpos;
    struct sun8i_emac_dma_desc *txdesc = &dma->txdescs[txpos];

    inv_cache(txdesc, sizeof(struct sun8i_emac_dma_desc));
    if (txdesc->status & BIT(31))
    {
        dprintf("%s: emac dma queue is full\n", __func__);
        return;
    }
    emac->txpos = (txpos + 1) % EMAC_DMA_TX_BUFNUM;

    uint8_t *dma_data = (uint8_t*)dma->txbuf[txpos];

    for (struct pbuf *it = p; it != NULL; it = it->next)
    {
        memcpy(dma_data, it->payload, it->len);
        dma_data += it->len;
    }
    clean_cache(dma->txbuf[txpos], sizeof(dma->txbuf[txpos]));
    txdesc->st = p->tot_len | BIT(24) | BIT(29) | BIT(30) | BIT(31);
    txdesc->status = BIT(31);
    inv_cache(txdesc, sizeof(struct sun8i_emac_dma_desc));
    emac_setbits(emac, EMAC_TX_CTL1, BIT(30) | BIT(31));
}

static uint32_t emac_read(struct sun8i_emac *emac, int reg)
{
    return readl(emac->base + reg);
}

static void emac_write(struct sun8i_emac *emac, int reg, int val)
{
    writel(val, emac->base + reg);
}

static void emac_setbits(struct sun8i_emac *emac, int reg, uint32_t bits)
{
    uintptr_t addr = emac->base + reg;
    uintptr_t val = readl(addr);
    val |= bits;
    writel(val, addr);
}

static void emac_clearbits(struct sun8i_emac *emac, int reg, uint32_t bits)
{
    uintptr_t addr = emac->base + reg;
    uintptr_t val = readl(addr);
    val &= ~bits;
    writel(val, addr);
}

#if 1
static int emac_mdio_read(struct sun8i_emac *emac, int addr, int reg)
{
	uint32_t miiaddr = 0;
    unsigned int timeout = 1000;
    int ret = -1;

	miiaddr &= ~MDIO_CMD_MII_WRITE;
	miiaddr &= ~MDIO_CMD_MII_PHY_REG_ADDR_MASK;
	miiaddr |= (reg << MDIO_CMD_MII_PHY_REG_ADDR_SHIFT) &
		MDIO_CMD_MII_PHY_REG_ADDR_MASK;

	miiaddr &= ~MDIO_CMD_MII_PHY_ADDR_MASK;

	miiaddr |= (addr << MDIO_CMD_MII_PHY_ADDR_SHIFT) &
		MDIO_CMD_MII_PHY_ADDR_MASK;

	miiaddr |= MDIO_CMD_MII_BUSY;

    emac_write(emac, EMAC_MII_CMD, miiaddr);

    while (1)
    {
		if (!(emac_read(emac, EMAC_MII_CMD) & MDIO_CMD_MII_BUSY))
        {
            ret = (int)emac_read(emac, EMAC_MII_DATA);
            break;
		}
        if (!--timeout)
        {
            dprintf("%s: timeout\n", __func__);
            break;
        }
		udelay(10);
    }

	return ret;
}

static int emac_mdio_write(struct sun8i_emac *emac, int addr, int reg, uint16_t val)
{
    unsigned int timeout = 1000;
	uint32_t miiaddr = 0;
	int ret = -1;

	miiaddr &= ~MDIO_CMD_MII_PHY_REG_ADDR_MASK;
	miiaddr |= (reg << MDIO_CMD_MII_PHY_REG_ADDR_SHIFT) & MDIO_CMD_MII_PHY_REG_ADDR_MASK;

	miiaddr &= ~MDIO_CMD_MII_PHY_ADDR_MASK;
	miiaddr |= (addr << MDIO_CMD_MII_PHY_ADDR_SHIFT) & MDIO_CMD_MII_PHY_ADDR_MASK;

	miiaddr |= MDIO_CMD_MII_WRITE;
	miiaddr |= MDIO_CMD_MII_BUSY;

    emac_write(emac, EMAC_MII_DATA, val);
    emac_write(emac, EMAC_MII_CMD,  miiaddr);

    while (1)
    {
		if (!(emac_read(emac, EMAC_MII_CMD) & MDIO_CMD_MII_BUSY))
        {
			ret = 0;
			break;
		}
        if (!--timeout)
        {
            dprintf("%s: timeout\n", __func__);
            break;
        }
		udelay(10);
    }
	return ret;
}

#endif

static void emac_start(struct sun8i_emac *emac)
{
    uint32_t timeout = 1000;
    while (1)
    {
        if ((emac_read(emac, EMAC_CTL1) & 0x1) == 0)
            break;
        if (!--timeout)
        {
            dprintf("%s: wait emac reset timeout\n", __func__);
            break;
        }
    }

    /* TX_MD Transmission starts after a full frame located in TX DMA FIFO*/
    emac_setbits(emac, EMAC_TX_CTL1, BIT(1));
    emac_setbits(emac, EMAC_RX_CTL1, BIT(1));

    emac_write(emac, EMAC_CTL1, 8 << 24);

    //sun8i_adjust_link

    // start rx dma
    emac_setbits(emac, EMAC_RX_CTL1, BIT(30));
    // start tx dma
    emac_setbits(emac, EMAC_TX_CTL1, BIT(30));

    // enable rx
    emac_setbits(emac, EMAC_RX_CTL0, BIT(31));
    // enable tx
    emac_setbits(emac, EMAC_TX_CTL0, BIT(31));
}

static void dma_desc_init(struct sun8i_emac_dma_desc descs[], uint8_t buf[][EMAC_DMA_ETH_BUFSIZE], int num)
{
    for (int i = 0; i < num; i++)
    {
        descs[i].addr = virt_to_phys((uintptr_t)buf[i]);
        descs[i].next = virt_to_phys((uint32_t)&descs[(i+1)%num]);
    }
}

static void emac_update_link_status(struct sun8i_emac *emac)
{
    struct netif *netif = &emac->netif;
    int phy_addr = (int)emac->res.phy;
    int bmsr = emac_mdio_read(emac, phy_addr, BMSR);
    uint8_t link_status = 0;
    if (bmsr >= 0)
    {
        if ((bmsr & (BMSR_LINK | BMSR_NEGCOMP)) == (BMSR_LINK | BMSR_NEGCOMP))
        {
            link_status |= LINK_ON;
            if (bmsr & BMSR_100M_FD)
            {
                link_status |= LINK_SPEED_100;
                link_status |= LINK_FULL_DUPLEX;
            }
            else if (bmsr & BMSR_100M_HD)
            {
                link_status |= LINK_SPEED_100;
            }
            else if (bmsr & BMSR_10M_FD)
            {
                link_status |= LINK_FULL_DUPLEX;
            }
        }
    }
    
    if (link_status != emac->link_status)
    {
        uint32_t ctrl = emac_read(emac, EMAC_CTL0);
        ctrl &= ~(0xf);
        if (link_status)
        {
            if (link_status & LINK_SPEED_100)
            {
                ctrl |= 0xC;
            }
            else
            {
                ctrl |= 0x8;
            }
            if (link_status & LINK_FULL_DUPLEX)
            {
                ctrl |= 0x1;
            }
            emac_write(emac, EMAC_CTL0, ctrl);
            netif_set_link_up(netif);
        }
        else
        {
            netif_set_link_down(netif);
        }
        emac->link_status = link_status;
    }

}

static void emac_isr(int irq, void *data)
{
    struct sun8i_emac *emac = (struct sun8i_emac *)data;
    uint32_t status;
    
    status = emac_read(emac, EMAC_INT_STA);
    if (status & RX_INT)
    {
        emac_clearbits(emac, EMAC_INT_EN, RX_INT);    // disable rx interrupt flag
        emac_write(emac, EMAC_INT_STA, RX_INT);       // clean rx interrupt flag
        LOS_EventWrite(&emac->rx_event, 0x1);
    }
}

static void emac_work_thread(struct sun8i_emac *emac)
{
    struct sun8i_emac_dma *dma = &emac->dma;

    while (1)
    {
        
        uint32_t event = LOS_EventRead(&(emac->rx_event), 0x1, LOS_WAITMODE_OR | LOS_WAITMODE_CLR, LOSCFG_BASE_CORE_TICK_PER_SECOND / 10);

        if (event & 0x1)
        {
            int rxpos = emac->rxpos;
            while (1)
            {
                struct sun8i_emac_dma_desc *rxdesc = &dma->rxdescs[rxpos];
                inv_cache(rxdesc, sizeof(struct sun8i_emac_dma_desc));
                if ((rxdesc->status & BIT(31)) != 0)
                {
                    break;
                }
                inv_cache(dma->rxbuf[rxpos], EMAC_DMA_ETH_BUFSIZE);
                uint16_t len = (rxdesc->status >> 16) & 0x3FFF;
                uint16_t alloc_len = len;
                #if ETH_PAD_SIZE
                alloc_len += ETH_PAD_SIZE;
                #endif

                if (alloc_len < EMAC_DMA_ETH_BUFSIZE)
                    alloc_len = EMAC_DMA_ETH_BUFSIZE;

                struct pbuf *pbuf = pbuf_alloc(PBUF_RAW, alloc_len, PBUF_RAM);
                pbuf->len = len;
                pbuf->tot_len = len;

                #if ETH_PAD_SIZE
                pbuf_header(pbuf, -ETH_PAD_SIZE);
                #endif
                // pbuf->payload = dma->rxbuf[rxpos];
                memcpy(pbuf->payload, dma->rxbuf[rxpos], len);

                #if ETH_PAD_SIZE
                pbuf_header(pbuf, ETH_PAD_SIZE);
                #endif

                driverif_input(&emac->netif, pbuf);

                rxdesc->status |= BIT(31);
                clean_cache(rxdesc, sizeof(struct sun8i_emac_dma_desc));
                rxpos = (rxpos + 1) % EMAC_DMA_RX_BUFNUM;
            }
            emac->rxpos = rxpos;
            emac_setbits(emac, EMAC_INT_EN, RX_INT);
        }
        emac_update_link_status(emac);
    }
}

static void emac_interrupt_init(struct sun8i_emac *emac)
{
    request_irq(emac->res.irq, (irq_handler_t)emac_isr, 0, SUN8I_ETH_DRIVER_NAME, (void *)emac);
    enable_irq(emac->res.irq);
    emac_setbits(emac, EMAC_INT_EN, RX_INT); // enable rx interrupt
}

static void emac_start_work_thread(struct sun8i_emac *emac)
{
    TSK_INIT_PARAM_S task_param = 
    {
        .pfnTaskEntry = (TSK_ENTRY_FUNC)emac_work_thread,
        .auwArgs[0] = (uintptr_t)emac,
        .uwStackSize = 0x20000,
        .pcName = SUN8I_ETH_DRIVER_NAME,
        .usTaskPrio = 3,
    };
	LOS_EventInit(&emac->rx_event);
    LOS_TaskCreate(&emac->task_id, &task_param);
}

static void emac_reset_phy(struct sun8i_emac *emac)
{
    int phy_addr = (int)emac->res.phy;
    unsigned timeout = 1000;
    emac_mdio_write(emac, phy_addr, BMCR, BMCR_RST);
    while (--timeout)
    {
        int val = emac_mdio_read(emac, phy_addr, BMCR);
        if (val < 0)
            continue;
        if ((val & BMCR_RST) == 0)
            return;
    }
    dprintf("%s: timeout\n", __func__);
}

static void emac_init(struct sun8i_emac *emac)
{
    emac->base = (uintptr_t)ioremap(emac->res.base, EMAC_SIZE);
    // reset phy
    emac_reset_phy(emac);

	emac_write(emac, EMAC_CTL0, 0x0d);// magic code enable 100mbps duplix internal phy

    struct sun8i_emac_dma *dma = &emac->dma;

	dma_desc_init(dma->txdescs, dma->txbuf, EMAC_DMA_TX_BUFNUM);
    dma_desc_init(dma->rxdescs, dma->rxbuf, EMAC_DMA_RX_BUFNUM);

    for (int i = 0; i < EMAC_DMA_TX_BUFNUM; i++)
    {
        dma->txdescs[i].st = 0;
        dma->txdescs[i].status = BIT(31);
    }

    for (int i = 0; i < EMAC_DMA_RX_BUFNUM; i++)
    {
        dma->rxdescs[i].st |= EMAC_DMA_ALLOW_SIZE_MAX;
        dma->rxdescs[i].status = BIT(31);
    }

    clean_cache(dma, sizeof(struct sun8i_emac_dma));

    emac_write(emac, EMAC_TX_DMA_DESC, virt_to_phys((uintptr_t)&(dma->txdescs)));
    emac_write(emac, EMAC_RX_DMA_DESC, virt_to_phys((uintptr_t)&(dma->rxdescs)));

    emac_start(emac);
    emac_start_work_thread(emac);
    emac_interrupt_init(emac);

    struct netif *netif = &emac->netif;
    ip4_addr_t zero;

    memset(&zero, 0, sizeof(zero));
    netif->drv_send = emac_netif_send;
    netif->drv_set_hwaddr = emac_netif_set_hwaddr;
#if PF_PKT_SUPPORT
    netif->drv_config = NULL;
#endif
#if LWIP_NETIF_ETHTOOL
    netif->ethtool_ops = NULL;
#endif
    netif->link_layer_type = ETHERNET_DRIVER_IF;
    netif->hwaddr_len = ETHARP_HWADDR_LEN;
    memcpy(netif->hwaddr, emac->res.mac, ETHARP_HWADDR_LEN);
    emac_netif_set_hwaddr(netif, netif->hwaddr, ETHARP_HWADDR_LEN);
    
    netif_add(netif, &zero, &zero, &zero);
#if LWIP_TX_CSUM_OFFLOAD
    netif->flags |= NETIF_FLAG_DRIVER_CSUM_SUPPORT;
#endif
    netif_set_link_down(netif);
}

void ethnet_sun8i_eth_init(void)
{
    emac_init(&emac0);
}

struct netif* ethnet_sun8i_netif_get(void)
{
    return &emac0.netif;
}

